# -*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html
import datetime
import re

import scrapy
from scrapy.loader import ItemLoader
from scrapy.loader.processors import TakeFirst, MapCompose, Join


def add_jobbole(value):
    return value + "-bobby"


def date_convert(value):
    return value.replace('·', '').strip()


def get_nums(value):
    match_re = re.match(".*?(\d+).*", value)
    if match_re:
        nums = int(match_re.group(1))
    else:
        nums = 0
    return nums


def return_value(value):
    return value


def remove_comment_tags(value):
    if "评论" in value:
        return ""
    else:
        return value


class FirstscrapyItem(scrapy.Item):
    title = scrapy.Field()
    link = scrapy.Field()
    desc = scrapy.Field()


class ArticleItemLoader(ItemLoader):
    default_output_processor = TakeFirst()


class JobboleArticleItem(scrapy.Item):
    title = scrapy.Field()
    create_date = scrapy.Field(
        input_processor=MapCompose(date_convert)
    )
    # create_date = scrapy.Field()
    url = scrapy.Field()
    url_object_id = scrapy.Field()
    front_image_url = scrapy.Field(
        output_processor=MapCompose(return_value)
    )
    front_image_path = scrapy.Field()
    praise_nums = scrapy.Field(
        input_processor=MapCompose(get_nums)
    )
    comment_nums = scrapy.Field(
        input_processor=MapCompose(get_nums)
    )
    fav_nums = scrapy.Field(
        input_processor=MapCompose(get_nums)
    )
    tags = scrapy.Field(
        input_processor=MapCompose(remove_comment_tags),
        output_processor=Join(",")
    )

    content = scrapy.Field()


    def get_insert_sql(self):
        insert_sql = """
            INSERT INTO `article` (`title`, `url`, `create_date`, `fav_nums`, `front_image_url`, `front_image_path`, `praise_nums`, `comment_nums`, `tags`, `content`)
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE `content`=VALUES(`content`)
        """

        front_image_url = ""
        if self["front_image_url"]:
            front_image_url = self["front_image_url"][0]
        params = (self["title"], self["url"], self["create_date"], self["fav_nums"], front_image_url,
                  self["front_image_path"], self["praise_nums"], self["comment_nums"], self["tags"], self["content"])

        return insert_sql, params


class QuestionItem(scrapy.Item):
    zhihu_id = scrapy.Field()               #特定的一串数字（answer）
    topics = scrapy.Field()                 #标签
    url = scrapy.Field()                    #问题链接
    title = scrapy.Field()                  #问题标题
    content = scrapy.Field()                #标题正文
    answer_num = scrapy.Field(              #回答数
        input_processor=MapCompose(get_nums)
    )
    comments_num = scrapy.Field(             #评论数
        input_processor=MapCompose(get_nums)
    )
    watch_user_num = scrapy.Field(          #关注数
        input_processor=MapCompose(get_nums)
    )
    click_num = scrapy.Field(               #浏览数
        input_processor=MapCompose(get_nums)
    )
    crawl_time = scrapy.Field()             #爬取的时间


class AnswerItem(scrapy.Item):
    zhihu_id = scrapy.Field()       #特定的一串数字（answer）
    url = scrapy.Field()            #回答链接
    question_id = scrapy.Field()    #所属问题id
    author_id = scrapy.Field()      #作者id
    content = scrapy.Field()        #回答内容
    parise_num = scrapy.Field(      #点赞数
        input_processor=MapCompose(get_nums)
    )
    comments_num = scrapy.Field(    #评论数
        input_processor=MapCompose(get_nums)
    )
    create_time = scrapy.Field()    #回答时间
    update_time = scrapy.Field()    #更新时间
    crawl_time = scrapy.Field()     #爬取时间


